/*
   Copyright (C) 2007,2008 Qualcomm Incorporated. All rights reserved.
   Written by Max Krasnyansky <maxk@qualcomm.com>

   This file is part the Bones library. It is licensed under
   Boost Software License - Version 1.0 - August 17th, 2003

   Permission is hereby granted, free of charge, to any person or organization
   obtaining a copy of the software and accompanying documentation covered by
   this license (the "Software") to use, reproduce, display, distribute,
   execute, and transmit the Software, and to prepare derivative works of the
   Software, and to permit third-parties to whom the Software is furnished to
   do so, all subject to the following:

   The copyright notices in the Software and this entire statement, including
   the above license grant, this restriction and the following disclaimer,
   must be included in all copies of the Software, in whole or in part, and
   all derivative works of the Software, unless such copies or derivative
   works are solely in the form of machine-executable object code generated by
   a source language processor.

   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
   SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
   FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
   DEALINGS IN THE SOFTWARE.
*/

/**
 * @file bones/cpu.h
 * CPU library. Provides things like reading cpu cycle counter (tsc), 
 * converting tsc to microseconds, etc.
 */

#ifndef BONES2_CPU_H
#define BONES2_CPU_H

#include <stdint.h>
#include <assert.h>

#include <bones/compiler.h>

namespace bones {
namespace cpu {

// Functions that start with double underscore are processor specific

// Cpu frequency in kilo-hertz
extern unsigned long _khz;

// Overhead of various functions
extern struct overhead {
	unsigned long nanosleep;
} _overhead;

// Capabilities of the current process
extern unsigned long _caps;
enum {
	CAPS_IRQ_LOCK = 0,
};

static inline bool capable(unsigned int cap)
{
	return _caps & cap;
}

#if (defined(__i386__) || defined(__x86_64__))
#include <bones/cpu-x86.h>
#else
#error Unsupported CPU
#endif

/**
 * Calibrate CPU.
 * This function detects CPU frequency and initializes the CPU library.
 * Call this functions before using any other functions of this library.
 * @warning Timing services are reliable only if CPU clock frequency 
 * does not change.
 */
bool calibrate();

/** 
 * Get current value of the CPU cycle counter.
 * Non serializing version. i.e. may complete sooner than previos
 * instructions.
 * Example:
 *    @code
 *    uint64_t tsc = cpu::tsc();
 *    @endcode
 */
static inline uint64_t tsc(void)
{
	return __tsc();
}

/**
 * Sleep for @a nsec nanoseconds.
 * This is a busy sleep not a regular OS nanosleep().
 * @param usec Number of nanoseconds to sleep for
 */
void nanosleep(unsigned long nsec);

/**
 * Sleep for @a usec microseconds.
 * This is a busy sleep not a regular OS usleep().
 * @param usec Number of microseconds to sleep for
 */
static inline void usleep(unsigned long usec)
{
	nanosleep(usec * 1000);
}

// Scaled math for converting tsc to nsec and usec.
// Floating point version
//     nsec = tsc * (1000000.0 / khz)
// scaled version
//     nsec = tsc * (1000000 * SCALE / khz) / SCALE
// If SCALE is a power of two, last divisions can be replaced with a shift
//     nsec = tsc * ((1000000 << SCALE_SHIFT) / khz) >> SCALE_SHIFT
//
// Scale factors are computed by cpu::calibrate()
extern struct scale {
	uint64_t tsc2usec;
	uint64_t tsc2nsec;
	uint64_t nsec2tsc;
	uint64_t usec2tsc;
} _scale;

// These shifts provide good precision (almost match floating point)
// but require full 64bits
enum {
	TSC2NSEC_SCALE_SHIFT = 20,
	TSC2USEC_SCALE_SHIFT = 30,
	NSEC2TSC_SCALE_SHIFT = 30,
	USEC2TSC_SCALE_SHIFT = 20,
};

/**
 * Convert tsc to nanoseconds.
 * @param tsc number of cycles
 */
static inline uint64_t tsc2nsec(uint64_t tsc)
{
	return (tsc * _scale.tsc2nsec) >> TSC2NSEC_SCALE_SHIFT;
}

/**
 * Convert tsc to microseconds.
 * @param tsc number of cycles
 */
static inline uint64_t tsc2usec(uint64_t tsc)
{
	return (tsc * _scale.tsc2usec) >> TSC2USEC_SCALE_SHIFT;
}

/**
 * Convert nanoseconds to tsc.
 * @param nsec nanoseconds
 * @warning due to rounding errors it's better to always convert
 * one way. In other words not to mix tsc2nsec and nsec2tsc.
 */
static inline uint64_t nsec2tsc(uint64_t nsec)
{
	return (nsec * _scale.nsec2tsc) >> NSEC2TSC_SCALE_SHIFT;
}

/**
 * Convert microseconds to CPU tsc.
 * @param usec microseconds
 * @warning due to rounding errors it's better to always convert
 * one way. In other words to not mix tsc2usec and usec2tsc.
 */
static inline uint64_t usec2tsc(uint64_t usec)
{
	return (usec * _scale.usec2tsc) >> USEC2TSC_SCALE_SHIFT;
}

/**
 * Get difference in usec from the previous measurement.
 * @param t number of cycles
 */
static inline uint64_t usec_elapsed(uint64_t t)
{
	return tsc2usec(tsc() - t);
}

/**
 * Get difference in nsec from the previous measurement.
 * @param t number of cycles
 */
static inline uint64_t nsec_elapsed(uint64_t t)
{
	return tsc2nsec(tsc() - t);
}

/**
 * Tell the cpu to relax :).
 * Use this in busy wait loops to conserve power.
 */
static inline void relax(void)
{
	__relax();
}

// CPU locking. 
// Use of this functions is _strongly_ discouraged.

// Nested lock counter
extern volatile unsigned long _locked;

/**
 * Lock cpu.
 * Generally (x86, x86-64) this function simply locks interrupts. It may
 * do other things on exotic platforms :).
 * Locks can be nested.
 * @warning Do not use system calls inside locked region.
 * @warning Thread that uses this function must be pinned to a single CPU.
 */
static inline void lock(void)
{
	if (!capable(CAPS_IRQ_LOCK))
		return;
	
	__lock();
	_locked++;
}

/**
 * Unlock cpu.
 * @see lock()
 */
static inline void unlock(void)
{
	if (!capable(CAPS_IRQ_LOCK))
		return;
	
	/* We don't need atomic op here because we're inside locked
	 * region and are not supposed to be preempted. */
	_locked--;
	assert(_locked >= 0);

	if (!_locked)
		__unlock();
}

/**
 * Sleep for @a nsec nanoseconds inside locked section.
 * This function unlocks the CPU, does busy sleep and then locks CPU again.
 * It also compensates for the scheduling jitter (if any).
 * @param usec Number of nanoseconds to sleep for
 */
void nanosleep_locked(unsigned long nsec);

/**
 * Sleep for @a usec microseconds inside locked section.
 * This function unlocks the CPU, does busy sleep and then locks CPU again.
 * It also compensates for a scheduling jitter (if any).
 * @param usec Number of microseconds to sleep for
 */
static inline void usleep_locked(unsigned long usec)
{
   nanosleep_locked(usec * 1000);
}

} // namespace cpu
} // namespace bones

#endif // BONES2_CPU_H
